*****************************************************************
* Replication directory for                                   ***
* Prime locations                                             ***
* by Gabriel M. Ahlfeldt, Thilo N.H. Albers, Kristian Behrens ***
* Published in American Economic Review: Insights             ***
*****************************************************************
* 01/2025
* Stata
version 17.0

* This do file conducts overid tests of prime points by correlating prime points 
* with SNL investments, starbucks, and coworking spaces at the level of 100k disks

* Load data generated using C++ due to compuational requirements
use "$data_125cities/OVERID/overid750_data.dta",clear

* Merge world regions
	merge m:1 metro_id using  "$temp/metro_using.dta", keepusing(world_region)
	keep if _m == 3
	drop _m
	replace world_region = "Africa \& Middle East" if world_region == "Africa" | world_region == "Middle East"	
	
* Encode
	qui encode world_region, gen(WR)
	
* Improve variable names
	ren pp primepoint_count
	ren snl SNL_count
	ren star starbucks_count
	
* Generate a grid for clustering
	gen round_lat = round(lat, 0.01)
	gen round_lon = round(lon, 0.01)
	egen cluster_cells = group(round_lat round_lon metro_id)
	
* Gen outcome
	qui gen outcome = ""
	qui replace outcome = "starbucks" if _n <= 9
	qui replace outcome = "SNL" if  _n <= 6
	qui replace outcome = "coworking"  if _n <= 3
	qui gen coeff = 1 if _n == 1 | _n == 4  | _n == 7
	qui gen p = 1 if _n == 2 | _n == 5 | _n == 8
	qui  gen r2 = 1 if _n == 3 | _n == 6  | _n == 9
	
* Run regressions
	foreach num of numlist 1/6  {
		qui gen WR_`num' = . 
		foreach name in coworking SNL starbucks {
			qui reghdfe `name'  primepoint_count if WR==`num' , abs(metro_id) cluster(cluster_cells) // vce(robust)
			qui replace WR_`num' = _b[primepoint_count]	if outcome == "`name'"	& coeff == 1
			qui replace WR_`num' = _se[primepoint_count]	if outcome == "`name'"	& p == 1 // New version simply saving SE
			qui replace WR_`num' = e(r2_a_within)	if outcome == "`name'"	& r2 == 1	
			display "`name' `num'"
		}
	}

* Count metros
	replace outcome = "# cities" if _n == 10
	foreach num of numlist 1/6 {
	display "... computing # cities..."
	qui tab metro_id if WR == `num'
	qui replace WR_`num' = r(r) if outcome == "# cities"
	}

* Finalize Table
	drop if _n > 10
	* drop if p == 1 // Now we need this since it contains the SE
	gen stat = ""
	replace stat = "Coeff." if coeff == 1
	replace stat = "S.E." if p == 1 // Add SE to the reported stats
	replace stat = "R2" if r2 == 1
	replace outcome = "" if outcome == ""
	keep outcome stat WR_* // star_* Star no longer generated
	foreach num of numlist 1/6 {
		tostring WR_`num', replace force format(%9.3f)
		* replace WR_`num' = WR_`num' + star_`num'  // Stars no longer desired
	}
	* drop star_* // Stars no longer generated
	order outcome stat
	replace outcome = "Coworking spaces" if outcome == "coworking"
	replace outcome = "SNL-S\&P investments" if outcome == "SNL"
	replace outcome = "Starbucks" if outcome == "starbucks"
	replace outcome = "" if stat == "R2"
	replace outcome = "" if stat == "S.E." // cosmetic change since we have an additional row
	label var outcome "Outcome"
	label var stat "Stat."
	label var WR_1 "Africa \& Middle East"
	label var WR_2 "Asia"
	label var WR_3 "Australia"
	label var WR_4 "Europe"
	label var WR_5 "North America"
	label var WR_6 "South America"
	capture mkdir "$tables_App/GlobalCities"
* Save Table B3.3
	texsave * using "$tables_App/GlobalCities/Table_B3_3_125CitiesValidationCoworkingStarSNL.tex", title("Validation of big data establishments") ///
	size("footnotesize")   width(15cm) align(lccccc)  varlabels replace  frag  footnote("Coefficients are standardized beta coefficients.")	

* Script ends	